/*****************************************************************************************
  Project   : The Impact of Unions on Non-union Wage Setting: Threats and Bargaining
  Authors   : David A. Green, Ben M. Sand, Iain G. Snoddy, Jeanne Tschopp
  Date      : August 2025
  Output    : Creates data used for estimation of Tables 1, 2, and 3 
  Notes     : This file has options for:
			: demographic group `1', 
            : name of group `2'
			: transition case `3'
			: years of experience to calculate changes over time `4'
*****************************************************************************************/

clear
set more off
tempfile temp
set matsize 600



// Option for evaluating wage trends at different levels of experience.
local pexp = `4'
local pexp2 = (`pexp')^2

// Get industry transition data	
do "${td}/Job_Transitions_pub.do" "`1'"  "`2'"	

//
// Reg adjusted wages		
//

tempfile temp
		
foreach y in    1 2 3 4 5  {	
	
	// individual data
		use ${wd}/cps/cps_wd.dta if  earnsamp == 1 & year2==`y' & `1' , clear
		
		keep if wage!=. 
		keep if union!=.
		egen eta = sum(wgt), by(${area} ${ind} union)  // size of industry before drop alloc 
		keep if alloc!=1
		
		// wage measure -- hourly or weekly 
		replace wage=lnhrw_cpi
		replace wage = . if alloc == 1
				
		cap drop if ${area}==.

		keep year ${area} wage female black educ wgt lswt yearsch_1 married ///
			${ind} pexp pexp2 hispanic age alloc union eta state year2

		egen AxIxU = group(${area} ${ind} union)
		
		// EF groups
		if (regexm("`2'", "EF_")) {
			
			gen     yw = (1 - (abs(27.5 - age) / 20)^2) if abs(27.5 - age) < 20
			replace yw = 0 if missing(yw)
			
			gen young_low  = yw * (.6 * (educ == 3) + .7 * (educ == 1) + 1 * (educ == 2))
			gen young_high = yw * (.4 * (educ == 3) + 1 * (educ == 4))
			
			gen     ow = (1 - (abs(45 - age) / 20)^2) if abs(45 - age) < 20
			replace ow = 0 if missing(ow)
			
			gen old_low  = ow * (.6 * (educ == 3) + .7 * (educ == 1) + 1 * (educ == 2))
			gen old_high = ow * (.4 * (educ == 3) + 1 * (educ == 4))

			local x = subinstr("`2'", "EF_", "", .)
			
			replace wgt = wgt  * `x'
			
			keep if !missing(wgt) 
			keep if wgt > 0
			}
		
		// Gender groups		  
		if (regexm("`2'", "Men") | regexm("`2'", "Women")) {
			
		// Education proprotions
		if `y' == 1 {
			cap drop temp
			egen temp = sum(wgt), by(educ)
			egen total = total(wgt)
			replace temp = temp / total
			
			reg temp ibn.educ, noc
			mat p = e(b)
			
			drop temp total
		}
			
			areg wage i.educ##(black  hispanic c.pexp c.pexp2 i.year) ///
				   c.pexp#(black hispanic i.year)                ///
				   c.pexp2#(black hispanic i.year) ///
				   [aw=wgt], ab(AxIxU)
				   
			margins educ, at(pexp = `pexp' black = 0 hispanic = 0 pexp2 = `pexp2')
			mat g = r(table)	

				   }
		else {
		
		// education proprotions
		if `y' == 1 {
			cap drop temp
			egen temp = sum(wgt), by(educ female)
			egen total = total(wgt)
			replace temp = temp / total
			
			reg temp ibn.educ#ibn.female, noc
			mat p = e(b)
			
			drop temp total
			
		}
		
			areg wage i.educ##(black female hispanic c.pexp c.pexp2 i.year) ///
				   c.pexp#(black female hispanic i.year)                ///
				   c.pexp2#(black female hispanic i.year) ///
				   [aw=wgt], ab(AxIxU)
				   
			margins educ#female, at(pexp = `pexp' black = 0 hispanic = 0 pexp2 = `pexp2')  noestimcheck 
			mat g = r(table)	

				   }
		
		predict double res if e(sample), d
		mat g = g[1,1...]
		cap drop year2
		
		
		// join the matrices 
		mat rowjoin h = p g
		mat p = h[1,1...]  // group proportions 
		mat g = h[2,1...]  // margins
		
		// Average by group
		mata {
			A = st_matrix("g")
			W = st_matrix("p")
			A = A'
			W = W'
			sum(W)
			W = W / sum(W)
			sum(W)
			B = mean(A,W)
			st_matrix("gg", B)
			}
	
		gen temp = gg[1,1]
		gen reswage = temp + res  if e(sample)
		drop temp
		
		// Union density
		egen union_density = wtmean(union) if e(sample), weight(wgt)

		// Get sample 
		bys ${area} ${ind} union: g sample_ic=_N 
		egen reg_wgt = sum(wgt), by( $ind $area union)
		
		// go to group level		
		drop if e(sample)==0
		
		
		// RTW 
		gen RTW = 0
		replace RTW = 1  if state == 63
		replace RTW = 1  if state == 86
		replace RTW = 1  if state == 71
		replace RTW = 1  if state == 59
		replace RTW = 1  if state == 58
		replace RTW = 1  if state == 82 & year >= 1985
		replace RTW = 1  if state == 32 & year >= 2012
		replace RTW = 1  if state == 47
		replace RTW = 1  if state == 61 & year >= 2017
		replace RTW = 1  if state == 72 & year >= 1976
		replace RTW = 1  if state == 34 & year >= 2012
		replace RTW = 1  if state == 64
		replace RTW = 1  if state == 46
		replace RTW = 1  if state == 88
		replace RTW = 1  if state == 57
		replace RTW = 1  if state == 44
		replace RTW = 1  if state == 73 & year >= 2001
		replace RTW = 1  if state == 57
		replace RTW = 1  if state == 45
		replace RTW = 1  if state == 62
		replace RTW = 1  if state == 74 & year >= 1993
		replace RTW = 1  if state == 87
		replace RTW = 1  if state == 54
		replace RTW = 1  if state == 35 & year >= 2015
		replace RTW = 1  if state == 55 & year >= 2016
		replace RTW = 1  if state == 83 	
		
		collapse (mean) sample_ic* res reswage reg_wgt  eta RTW union_density, by(${area} ${ind} union)

		g year2=`y'
	
		keep ${area} ${ind} year2 sample_ic* reswage reg_wgt union  RTW  union_density //prem
				
		cap append using `temp'
		save `temp', replace
		
}



// step 2 industry premia
	egen union_ind = group($ind union)
	reghdfe reswage [aw= reg_wgt], ab( prem = i.year#i.union_ind  )	 
	replace prem = prem + _b[_cons]
	
// Industry premiums
	egen temp = mean(prem), by(year union_ind)
	replace prem = temp
	drop temp

// Union premium for subsamp table
	areg prem union [aw=reg_wgt] if year2 == 1, ab(detind) 
	gen union_premium = _b["union"]

// rename res
	replace res = reswage
	
//
// rectangularize data and reshape wide
//
	fillin year $area $ind union
	drop union_ind _fillin 
	
// minor cleanup
	// replace missing with zeros
	foreach var in reg_wgt sample_ic {
		replace `var' = 0 if `var' == . 
	}
	// replace missing with national level
	foreach var in prem {
		egen temp = mean(`var'), by(year $ind union)
		replace `var' = temp if `var' == .
		drop temp
	}

//
// shares
//

	// change to eta to include non-allocated in counts
	local x reg_wgt

	// industry share within union-city
 	egen n_ic = sum(`x'), by(year $area union)
	replace n_ic = `x' / n_ic
	
	// job share within city
	egen n_jc = sum(`x'), by(year $area)
	replace n_jc = `x' / n_jc
	
	// union share with in city
	egen n_tc = sum(`x'), by(year $area union)
	egen temp = sum(`x'), by(year $area)
	replace n_tc = n_tc / temp
	
	drop temp
	
//
// Predicted shares
//

	egen  ic = group($ind $area union)
	tsset ic (year), yearly
	
	// national job count
	egen N_j = sum(`x'), by(year2 $ind union)
	
	// predicted job share
	gen temp = L.`x' * ( N_j / L.N_j )
	egen temp2 = sum(temp), by(year2  $area )
	gen pn_jc = temp / temp2
	drop temp* N_j
	
	// national union
	egen N_j = sum(`x'), by(year2 union)
		
	// predicted union share
	egen temp = sum(`x'), by(year $area union)
	gen temp2 = L.temp * ( N_j / L.N_j )
	egen temp3 = sum(temp2), by(year2  $area detind )
	gen  pn_tc = temp2 / temp3
	drop temp* N_j
		
	
	// national union
	egen N_j = sum(`x'), by(year2 detind)
		
	// predicted ind share
	gen temp = L.`x' * ( N_j / L.N_j )
	egen temp2 = sum(temp), by(year2  $area union )
	gen pn_ic = temp / temp2
	drop temp* 	

	drop N_j	
	drop ic

//
// Transitions
//


// step 1
	save "${wd}/temp_ind_trans.dta", replace
	save "${wd}/decomp_out_${ind}_${area}_`2'_case_`3'.dta", replace

// step 2
	tempfile ind_ind
	use  "${wd}/temp_ind_trans.dta", clear

	keep $area $ind year2 union n_jc pn_jc n_ic n_tc pn_tc pn_ic res prem 
	rename $ind detind_2
	rename union union_2
	save `ind_ind'

// step 3
	use  "${wd}/temp_ind_trans.dta", clear
	 
	keep $area $ind union year2 

	joinby year2 detind  union using ${wd}/ipums_ind_trans/transitions_`2'_case_`3'.dta, unmatched(both)
	tab _merge
	keep if _merge==3
	drop _merge

	joinby year2 $area detind_2 union_2 using `ind_ind', unmatched(both)
	tab _merge
	keep if _merge==3
	drop _merge


//
// Generate Relative Costs / options / IVs
//


// Relative costs
	gen double temp = phi_tt * phi_ii * n_jc
	egen temp2 = sum( temp ), by(job $area year2)
	drop temp
	gen double Xi =  (phi_tt * phi_ii) / temp2
	drop temp2 

// outside options
	gen E_1c = Xi * n_jc * res if union_2 == 1
	gen E_0c = Xi * n_jc * res if union_2 == 0
	gen E_c  = Xi * n_jc * res

	gen N_to_U = Xi * n_jc if union_2==1 & union == 0

// Instruments 
	egen ic = group($area job job_2) 
	tsset ic (year), yearly
	
// Predicted relative costs
	gen  double temp = phi_tt * phi_ii * pn_jc  if job!=job_2
	egen temp2 = sum( temp ), by(job $area year2) 
	drop temp
	gen  double Xihat =  (phi_tt * phi_ii) / temp2  if job!=job_2
	drop temp2 

	gen  double temp = phi_tt * phi_ii * n_jc if job!=job_2
	egen temp2 = sum( temp ), by(job $area year2)
	drop temp
	gen  double Xi_loo =  (phi_tt * phi_ii) / temp2 if job!=job_2
	drop temp2 


	gen E_hat_n   = Xihat    * pn_jc  * prem    if union_2 == 1
	gen E_tilde_n = L.Xi_loo * L.n_jc * L.prem  if union_2 == 1

	gen E_hat_u   = Xihat    * pn_jc * prem     if union_2 == 0
	gen E_tilde_u = L.Xi_loo * L.n_jc * L.prem  if union_2 == 0
 
	gen E_hat     = Xihat    * pn_jc  * prem
	gen E_tilde   = L.Xi_loo * L.n_jc * L.prem

	drop Xihat

// Go back to job city year obs level
collapse (sum) E* N_to_U , by(year2 $area detind union)	// N_to_U IV*

// Save to merge back into orginal data 
tempfile temp
save `temp', replace

use  "${wd}/temp_ind_trans.dta", clear
merge 1:1 year2 $area $ind union using `temp'
tab _merge
drop _merge  

// save for union-wage data program
save ${wd}/reg_union_${ind}_${area}_`2'_case_`3'.dta, replace

// reshape for non-union wages
reshape wide reswage union_density sample_ic reg_wgt prem union_premium n_* pn*  E_* RTW N_to_U , i( year $ind ${area} ) j(union) // N_to_U  IV_*
renvars *0, postdrop(1)
renvars E_c1 E_c E_1c1 E_0c1 E_hat_n1 E_tilde_n1 E_hat_u1 E_tilde_u1 E_hat1 E_tilde1, postsub(1 u) 


// Instruments 
	gen IV_En   = E_hat - E_tilde

// Union instruments
	gen IV_Eu  = E_hatu - E_tildeu

// Bring in Union Proportion
	merge 1:1 ${ind} ${area} year2 using  ${wd}/keyvars/propic.dta
	drop if _merge==2
	drop _merge

// Bring in ER data
	merge m:1 ${area} year2 using  ${wd}/keyvars/emp.dta
	drop if _merge==2
	drop _merge	

// new at city-ind
	merge 1:1 ${ind} ${area} year2 using  ${wd}/keyvars/union_elections_${area}_${ind}.dta
	drop _merge

// unit x time
	egen ic = group($area $ind) 
	tsset ic (year), yearly
	tsset


// Emulation
	gen P  = win_frac_ic_5
	gen LP = L.P
	gen dP = P - LP
	
	gen PE_n   = ( 1 - P ) * E_c
	gen PE_1n  = ( 1 - P ) * E_1c
	gen PE_0n  = ( 1 - P ) * E_0c
	
	// from union
	gen PE_u  =  P * E_cu
	gen PE_1u =  P * E_1cu
	gen PE_0u =  P * E_0cu
		
	// vars
	gen y =  ( win_frac_ic_5 / L.win_frac_ic_5 )
	gen x1 = ( lo_Elect_Estab_i_5 / L.lo_Elect_Estab_i_5 )
	gen x2 = ( lo_Elect_Estab_c_5 / L.lo_Elect_Estab_c_5 )
	gen x3 = L.RTW
		  
	gen sen_rep  = m_senate_rep_frac >.5
	gen hs_rep   = m_house_rep_frac  >.5
	gen gov_rep  = m_govparty_c2 == -1
	gen rep_cont =  sen_rep + hs_rep + gov_rep
		
	// predict
	ivreg2  y  c.x1##c.x2 c.x3  3.rep_cont  [aw=reg_wgt], noconstant  
		
	predict P_hat 
	replace P_hat = P_hat *  L.P
	
	// IVs
	gen IV_n  = ( 1 - P_hat ) * E_hat   - ( 1 - L.P ) * E_tilde
	gen IV_1n = ( 1 - P_hat ) * E_hat_n - ( 1 - L.P ) * E_tilde_n
	gen IV_0n = ( 1 - P_hat ) * E_hat_u - ( 1 - L.P ) * E_tilde_u		
			
	gen IV_u  = ( P_hat ) * E_hatu   - L.P * E_tildeu		
	gen IV_1u = ( P_hat ) * E_hat_nu - L.P * E_tilde_nu
	gen IV_0u = ( P_hat ) * E_hat_uu - L.P * E_tilde_uu	
	

	drop  y x1 x2 

* Save version for appendix figure
if "`2'" == "All" & `3' == 0 {
	save ${wd}/reg_dataRR3_app_fig_${ind}_${area}_`2'_case_`3'.dta, replace
	}

// Sample and changes
keep if sample_ic > 10 

// In changes
	tsset
		foreach var of varlist  E_*  PE* P_* reswage1 cert* { 
			gen d`var' = D.`var'
		}	
	
gen dwage   = D.reswage
gen dpropic = D.propic
gen dER     = D.lnEP_c

// Weights
//*********************************************************
bysort ic (year): gen weight = 1/(1/sqrt(sample_ic)+1/sqrt(sample_ic[_n-1]))
egen sw = sum(weight), by(year2)
replace weight = weight/sw
drop sw

// Keep Industries that observed more than once each year
bys ${ind} year2: g num=_N
keep if num>1
drop num

// Controls and Cluster
//*********************************************************
egen controls=group(year2 ${ind})
egen clusters=group(year2 ${area})

// Labels
//**********************************************************

lab var dPE_n  "$ \Delta\left((1-\PUN) \cdot E_{nict} \right) $"
lab var dPE_1n  "$ \Delta\left((1-\PUN) \cdot E_{uct|n i} \right) $"
lab var dPE_0n  "$ \Delta\left((1-\PUN)  \cdot E_{nct|n i} \right) $"
lab var dPE_u "$ \Delta\left(\PUN \cdot E_{uict} \right) $"

label var dER "$\Delta ER_c$"
lab var dpropic "$ \Delta P_{ic} $"

save ${wd}/reg_dataRRv3_${ind}_${area}_`2'_case_`3'.dta, replace


